Map election results to regions.

Assumes you have huffpostdata/election-2012-results cloned at ../../election-2012-results. Does 2000 regions by default; just change county_region_00 to county_region_10 below to do 2010.



In [1]:

    
from __future__ import division, print_function
%matplotlib inline



In [2]:

    
import numpy as np
import pandas as pd
import re
import six



In [3]:

    
from IPython.display import display



In [4]:

    
import sys
sys.path.append('..')



In [5]:

    
from pummeler.data import geocode_data



In [6]:

    
county_to_region = geocode_data('county_region_10').region.to_dict()

Map electoral results to regions



In [7]:

    
from glob import glob

First, handle Alaska specially:



In [8]:

    
assert len({v for k, v in county_to_region.iteritems() if k.startswith('02')}) == 1



In [9]:

    
ak_precincts = pd.read_csv('../../election-2012-results/data/ak_precincts.csv')



In [10]:

    
ak = ak_precincts.groupby(ak_precincts.candidate).sum().reset_index()
ak['state'] = 'ak'
ak['fips'] = next(k for k in county_to_region if k.startswith('02'))
ak['county'] = 'All of Alaska'



In [11]:

    
ak









    Out[11]:






  
    
      
      candidate
      votes
      state
      fips
      county
    
  
  
    
      0
      Johnson
      7392
      ak
      02100
      All of Alaska
    
    
      1
      Obama
      122640
      ak
      02100
      All of Alaska
    
    
      2
      Romney
      164676
      ak
      02100
      All of Alaska
    
    
      3
      Stein
      2917
      ak
      02100
      All of Alaska
    
    
      4
      Write-In
      2870
      ak
      02100
      All of Alaska



In [12]:

    
bits = [ak]
for f in glob('../../election-2012-results/data/??.csv'):
    piece = pd.read_csv(f, dtype={'fips': str})
    piece['state'] = f[-6:-4]
    bits.append(piece)
election = pd.concat(bits)

Normalize candidate names



In [13]:

    
reps = {
    'goode': 'virgil goode',
    'obama': 'barack obama',
    'johnson': 'gary johnson',
    'romney': 'mitt romney',
    'stein': 'jill stein',
    'virgil h. goode': 'virgil goode',
    'virgil h. goode jr.': 'virgil goode',
    'gary e. johnson': 'gary johnson',
    'write in': 'write-in',
    'write-ins': 'write-in',
    'hoefling': 'tom hoefling',
    'obama barack': 'barack obama',
    'stein jill': 'jill stein',
    'romney mitt': 'mitt romney',
    'johnson gary': 'gary johnson',
    'jill stein write-in': 'jill stein',
    'hoefling (write-in)': 'tom hoefling',
    'tom hoeffling': 'tom hoefling',
    'alexander': 'stewart alexander',
    'ross c. &quot;rocky&quot;': 'ross c. "rocky"',
    'ross c. rocky': 'ross c. "rocky"',
    'ross c.': 'ross c. "rocky"',
    'rocky': 'ross c. "rocky"',
    'paul': 'ron paul',
    'ron paul write-in': 'ron paul',
    'write-in**': 'write-in',
    'clymer': 'james clymer',
    'roth': 'cecil james roth',
    'prokopich': 'barbara prokopich',
    'barbara a. prokopich': 'barbara prokopich',
    'kevin m. thorne': 'kevin thorne',
    'thorne': 'kevin thorne',
}
def rewrite(s):
    s = s.lower()
    for x in ['/', ',', '(', ' and', ' for president']:
        p = s.find(x)
        if p != -1:
            s = s[:p]
    s = s.strip().replace('  ', ' ')
    s = reps.get(s, s)
    return s
election['cand'] = election.candidate.apply(rewrite)



In [14]:

    
cand_votes = election.groupby(election.cand).votes.sum().sort_values(ascending=False)



In [15]:

    
cand_votes.head(50)









    Out[15]:





cand
barack obama                65915527
mitt romney                 60936445
gary johnson                 1275791
jill stein                    466587
virgil goode                  114400
write-in                       60128
roseanne barr                  59051
thomas hoefling                38372
ron paul                       24870
ross c. "rocky"                23737
scattering                     18309
anderson                       16990
misc                           13275
richard duncan                 12517
blank                          11578
other write-ins                 8788
barr                            8212
terry                           6880
all others                      6552
good                            6274
none of these candidates        5770
baldwin                         5017
peta lindsay                    4723
christensen                     4433
stewart alexander               4401
james harris                    4091
stevens                         3856
randall terry                   3813
jim carlson                     3149
lindsay                         3067
merlin miller                   2676
jill reed                       2618
randall a. terry                2408
tom hoefling                    2177
sheila tittle                   1767
jerry white                     1540
ross                            1368
gloria la riva                  1236
dean morstad                    1094
jerry litzel                    1027
barbara dale washer             1016
jeff boss                       1007
scatter                          875
barnett                          839
sheila "samm" tittle             791
santa claus                      625
jack fellure                     518
gloria lariva                    372
thomas robert stevens            235
reed                             216
Name: votes, dtype: int64



In [16]:

    
election['party'] = 'oth'
election.loc[election.cand == 'barack obama', 'party'] = 'D'
election.loc[election.cand == 'mitt romney', 'party'] = 'R'
election.loc[election.cand == 'gary johnson', 'party'] = 'L'
election.loc[election.cand == 'jill stein', 'party'] = 'G'



In [17]:

    
election.groupby(election.party).votes.sum()









    Out[17]:





party
D      65915527
G        466587
L       1275791
R      60936445
oth      497625
Name: votes, dtype: int64

Slightly disagrees with https://en.wikipedia.org/wiki/United_States_presidential_election,_2012: they say Obama 65,915,795, Romney 60,933,504. Not sure how we got too many votes for Romney there; maybe Wikipedia miscounted?

Make sure that the FIPS codes are lining up reasonably



In [18]:

    
set(election.fips) - set(county_to_region)









    Out[18]:





{nan}



In [19]:

    
election[pd.isnull(election.fips)]









    Out[19]:






  
    
      
      candidate
      county
      fips
      state
      votes
      cand
      party
    
  
  
    
      16
      Obama
      Overseas
      NaN
      hi
      392
      barack obama
      D
    
    
      17
      Romney
      Overseas
      NaN
      hi
      78
      mitt romney
      R
    
    
      18
      Johnson
      Overseas
      NaN
      hi
      2
      gary johnson
      L
    
    
      19
      Stein
      Overseas
      NaN
      hi
      4
      jill stein
      G
    
    
      128
      Johnson, Gary E.
      STATE UOCAVA
      NaN
      me
      58
      gary johnson
      L
    
    
      129
      Obama, Barack
      STATE UOCAVA
      NaN
      me
      2071
      barack obama
      D
    
    
      130
      Romney, Mitt
      STATE UOCAVA
      NaN
      me
      858
      mitt romney
      R
    
    
      131
      Stein, Jill
      STATE UOCAVA
      NaN
      me
      51
      jill stein
      G
    
    
      132
      Anderson, Ross C.
      STATE UOCAVA
      NaN
      me
      2
      anderson
      oth
    
    
      133
      Paul, Ronald E.
      STATE UOCAVA
      NaN
      me
      14
      ron paul
      oth
    
    
      134
      Reed, Jill Ann
      STATE UOCAVA
      NaN
      me
      0
      reed
      oth
    
    
      135
      BLANK
      STATE UOCAVA
      NaN
      me
      13
      blank
      oth

UOCAVA = The Uniformed and Overseas Citizens Absentee Voting Act. Ignore these.



In [20]:

    
{fips for fips in set(county_to_region) - set(election.fips)
 if not fips.startswith('02')}









    Out[20]:





{'15005'}

15005 is Kalawao County, Hawaii, which has a population of 89 and is accessible only by mule trail. Its votes are counted under Maui (15009), and they're in the same PUMA anyway:



In [21]:

    
county_to_region['15005'] == county_to_region['15009']









    Out[21]:





True

Do the actual grouping



In [22]:

    
election_region = election.groupby(election.fips.map(county_to_region)) \
                          .apply(lambda x: x.votes.groupby(x.party).sum()).unstack()



In [23]:

    
election_region.index.name = 'region'
election_region.columns = ['votes_{}'.format(p) for p in election_region.columns]



In [24]:

    
election_region.fillna(0, inplace=True)
election_region = election_region.astype('int')



In [25]:

    
election_region.head()



In [26]:

    
election_region.to_csv('2012-by-region.csv.gz', compression='gzip')

	votes_D	votes_G	votes_L	votes_R	votes_oth
region
AK_00_01	122640	2917	7392	164676	2870
AL_00_01	18424	169	607	66016	275
AL_00_02	75150	116	301	62561	227
AL_00_03	15328	175	536	71439	402
AL_00_04	37374	211	631	74343	432

	candidate	votes	state	fips	county
0	Johnson	7392	ak	02100	All of Alaska
1	Obama	122640	ak	02100	All of Alaska
2	Romney	164676	ak	02100	All of Alaska
3	Stein	2917	ak	02100	All of Alaska
4	Write-In	2870	ak	02100	All of Alaska

	candidate	county	fips	state	votes	cand	party
16	Obama	Overseas	NaN	hi	392	barack obama	D
17	Romney	Overseas	NaN	hi	78	mitt romney	R
18	Johnson	Overseas	NaN	hi	2	gary johnson	L
19	Stein	Overseas	NaN	hi	4	jill stein	G
128	Johnson, Gary E.	STATE UOCAVA	NaN	me	58	gary johnson	L
129	Obama, Barack	STATE UOCAVA	NaN	me	2071	barack obama	D
130	Romney, Mitt	STATE UOCAVA	NaN	me	858	mitt romney	R
131	Stein, Jill	STATE UOCAVA	NaN	me	51	jill stein	G
132	Anderson, Ross C.	STATE UOCAVA	NaN	me	2	anderson	oth
133	Paul, Ronald E.	STATE UOCAVA	NaN	me	14	ron paul	oth
134	Reed, Jill Ann	STATE UOCAVA	NaN	me	0	reed	oth
135	BLANK	STATE UOCAVA	NaN	me	13	blank	oth